% Prompt Injection
@misc{crothers2022machine,
    title={Machine Generated Text: A Comprehensive Survey of Threat Models and Detection Methods},
    author={Evan Crothers and Nathalie Japkowicz and Herna Viktor},
    year={2022},
    eprint={2210.07321},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

@misc{nin2023new,
    title={New jailbreak based on virtual functions - smuggle illegal tokens to the backend.},
    author={u/Nin_kat},
    url={https://www.reddit.com/r/ChatGPT/comments/10urbdj/new_jailbreak_based_on_virtual_functions_smuggle},
    year={2023},
}

@misc{kang2023exploiting,
    title={Exploiting Programmatic Behavior of LLMs: Dual-Use Through Standard Security Attacks},
    author={Daniel Kang and Xuechen Li and Ion Stoica and Carlos Guestrin and Matei Zaharia and Tatsunori Hashimoto},
    year={2023},
    eprint={2302.05733},
    archivePrefix={arXiv},
    primaryClass={cs.CR}
}

@misc{greshake2023youve,
    title={More than you've asked for: A Comprehensive Analysis of Novel Prompt Injection Threats to Application-Integrated Large Language Models},
    author={Kai Greshake and Sahar Abdelnabi and Shailesh Mishra and Christoph Endres and Thorsten Holz and Mario Fritz},
    year={2023},
    eprint={2302.12173},
    archivePrefix={arXiv},
    primaryClass={cs.CR}
}

@misc{kiho2023chatgpt,
    title={ChatGPT "DAN" (and other "Jailbreaks")},
    author={LEE KIHO},
    url={https://github.com/0xk1h0/ChatGPT_DAN},
    year={2023}
}



@misc{branch2022evaluating,
    title={Evaluating the Susceptibility of Pre-Trained Language Models via Handcrafted Adversarial Examples},
    author={Hezekiah J. Branch and Jonathan Rodriguez Cefalu and Jeremy McHugh and Leyla Hujer and Aditya Bahl and Daniel del Castillo Iglesias and Ron Heichman and Ramesh Darwishi},
    year={2022},
    eprint={2209.02128},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

% https://simonwillison.net/2022/Sep/12/prompt-injection/
@misc{simon2022inject,
    title={Prompt injection attacks against GPT-3},
    author={Simon Willison},
    year={2022},
    month={Sep},
    url={https://simonwillison.net/2022/Sep/12/prompt-injection/}
}

@misc{goodside2022inject,
    title={Exploiting GPT-3 prompts with malicious inputs that order the model to ignore its previous directions},
    author={Riley Goodside},
    year={2022},
    url={https://twitter.com/goodside/status/1569128808308957185}
}

@misc{goodside2022history,
    title={History Correction},
    author={Riley Goodside},
    year={2023},
    url= {https://twitter.com/goodside/status/1610110111791325188?s=20&t=ulviQABPXFIIt4ZNZPAUCQ}
}


% prompt injection examples
@misc{chase2021adversarial,
    title={adversarial-prompts},
    author={Harrison Chase},
    year={2022},
    url={https://github.com/hwchase17/adversarial-prompts}
}

@misc{goodside2021gpt,
    title={GPT-3 Prompt Injection Defenses},
    author={Riley Goodside},
    year={2022},
    url={https://twitter.com/goodside/status/1578278974526222336?s=20&t=3UMZB7ntYhwAk3QLpKMAbw}  
}

% post prompting defense
@misc{christoph2022talking,
    author={Christoph Mark},
    title={Talking to machines: prompt engineering & injection},
    year={2022},
    month={Oct},
    day={3},
    url={https://artifact-research.com/artificial-intelligence/talking-to-machines-prompt-engineering-injection/}
}

% other defenses, eliezer
@misc{armstrong2022using,
    author={Stuart Armstrong, Rebecca Gorman},
    title={Using GPT-Eliezer against ChatGPT Jailbreaking},
    year={2022},
    month={Dec},
    day={6},
    url={https://www.alignmentforum.org/posts/pNcFYZnPdXyL2RfgA/using-gpt-eliezer-against-chatgpt-jailbreaking}
}

% reasonable recs
@misc{selvi2022exploring,
    author={Jose Selvi},
    title={Exploring Prompt Injection Attacks},
    year={2022},
    month={Dec},
    day={5},
    url={https://research.nccgroup.com/2022/12/05/exploring-prompt-injection-attacks/}
}

% Prompt Leaking
@misc{kevinbing,
    url={https://twitter.com/kliu128/status/1623472922374574080},
    title = {The entire prompt of Microsoft Bing Chat?! (Hi, Sydney.)},
    year = {2023},
    author={Kevin Liu}
}

% Jailbreaking Sources
@misc{perez2022jailbreak,
    doi = {10.48550/ARXIV.2211.09527},
    url = {https://arxiv.org/abs/2211.09527},
    author = {Perez, Fábio and Ribeiro, Ian},
    keywords = {Computation and Language (cs.CL), Artificial Intelligence (cs.AI), FOS: Computer and information sciences, FOS: Computer and information sciences},
    title = {Ignore Previous Prompt: Attack Techniques For Language Models},
    publisher = {arXiv},
    year = {2022},
    copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}
}

@misc{brundage_2022, 
    title={Lessons learned on Language Model Safety and misuse},
    url={https://openai.com/blog/language-model-safety-and-misuse/},
    journal={OpenAI},
    publisher={OpenAI},
    author={Brundage, Miles},
    year={2022},
    month={Mar}
} 
@misc{wang2022jailbreak,
    doi = {10.48550/ARXIV.2205.12390},
    url = {https://arxiv.org/abs/2205.12390},
    author = {Wang, Yau-Shian and Chang, Yingshan},
    keywords = {Computation and Language (cs.CL), Artificial Intelligence (cs.AI), FOS: Computer and information sciences, FOS: Computer and information sciences},
    title = {Toxicity Detection with Generative Prompt-based Inference},
    publisher = {arXiv},
    year = {2022},
    copyright = {Creative Commons Attribution 4.0 International}
}


@misc{alice2022jailbreak,
    url={https://twitter.com/alicemazzy/status/1598288519301976064},
    title = {ok I saw a few people jailbreaking safeguards openai put on chatgpt so I had to give it a shot myself},
    year = {2022},
    author={Alice Maz}
}

@misc{miguel2022jailbreak,
    url={https://twitter.com/m1guelpf/status/1598203861294252033},
    title = {Bypass @OpenAI's ChatGPT alignment efforts with this one weird trick},
    year = {2022},
    author={Miguel Piedrafita}
}

@misc{derek2022jailbreak,
    url={https://twitter.com/haus_cole/status/1598541468058390534},
    title = {ChatGPT jailbreaking itself},
    year = {2022},
    author={Derek Parfait}
}

@misc{nero2022jailbreak,
    url={https://twitter.com/NeroSoares/status/1608527467265904643},
    title = {Using "pretend" on #ChatGPT can do some wild stuff. You can kind of get some insight on the future, alternative universe.},
    year = {2022},
    author={Nero Soares}
}

@misc{nick2022jailbreak,
    url={https://twitter.com/NickEMoran/status/1598101579626057728},
    title = {I kinda like this one even more!},
    year = {2022},
    author={Nick Moran}
}

@misc{sudo2022jailbreak,
    url={https://www.sudo.ws/},
    year = {2022},
    author={Sudo}
}

@misc{sam2022jailbreak,
    url={https://twitter.com/samczsun/status/1598679658488217601},
    title = {uh oh},
    year = {2022},
    author={samczsun}
}

@misc{jonas2022jailbreak,
    url={https://www.engraved.blog/building-a-virtual-machine-inside/},
    title = {Building A Virtual Machine inside ChatGPT},
    publisher={Engraved},
    author = {Jonas Degrave},
    year = {2022},
    month = {Dec}
}

@misc{ignore_previous_prompt,
    doi = {10.48550/ARXIV.2211.09527},
    url = {https://arxiv.org/abs/2211.09527},
    author = {Perez, Fábio and Ribeiro, Ian},
    keywords = {Computation and Language (cs.CL), Artificial Intelligence (cs.AI), FOS: Computer and information sciences, FOS: Computer and information sciences},
    title = {Ignore Previous Prompt: Attack Techniques For Language Models},
    publisher = {arXiv},
    year = {2022}
}

